In [17]:
import pandas as pd
import numpy as np
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.colors as colors
import matplotlib.patches as mpatches
from pandas import cut
from matplotlib import colormaps as cmap
import folium

%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 8)
plt.style.use("ggplot")
In [18]:
health_cols = [
    'Census tract 2010 ID', 
    'County Name', 
    'State/Territory',
    'Total population', 
    'Percent American Indian / Alaska Native', 
    'Percent Asian', 'Percent Black or African American alone', 
    'Percent Hispanic or Latino', 
    'Percent Native Hawaiian or Pacific',
    'Percent other races', 
    'Percent White',
    'Coronary heart disease among adults aged greater than or equal to 18 years', 
    'Coronary heart disease among adults aged greater than or equal to 18 years (percentile)', 
    'Current asthma among adults aged greater than or equal to 18 years', 
    'Current asthma among adults aged greater than or equal to 18 years (percentile)',
    'Diagnosed diabetes among adults aged greater than or equal to 18 years',
    'Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)',
    'Low life expectancy (percentile)', 
    'Percent age 10 to 64', 
    'Percent age over 64', 
    'Percent age under 10'
]
In [19]:
health_stats = pd.read_csv(r"C:\\New_499_Code\\499_Cleaned_Abbreviated_CEJST_Disadvantaged_Communities_Data.csv", usecols=health_cols)
health_stats.head(2)
Out[19]:
Census tract 2010 ID County Name State/Territory Percent Black or African American alone Percent American Indian / Alaska Native Percent Asian Percent Native Hawaiian or Pacific Percent White Percent Hispanic or Latino Percent other races ... Percent age 10 to 64 Percent age over 64 Total population Current asthma among adults aged greater than or equal to 18 years (percentile) Current asthma among adults aged greater than or equal to 18 years Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile) Diagnosed diabetes among adults aged greater than or equal to 18 years Coronary heart disease among adults aged greater than or equal to 18 years (percentile) Coronary heart disease among adults aged greater than or equal to 18 years Low life expectancy (percentile)
0 1001020100 Autauga County Alabama 0.07 0.0 0.0 0.00 0.83 0.01 0.0 ... 0.76 0.13 1993.0 57.0 990.0 60.0 1130.0 59.0 640.0 89.0
1 1001020200 Autauga County Alabama 0.57 0.0 0.0 0.01 0.38 0.01 0.0 ... 0.73 0.14 1959.0 82.0 1100.0 83.0 1420.0 49.0 590.0 65.0

2 rows × 21 columns

In [20]:
CEJST_shapefile_path = r"C:\New_499_Code\assets\cb_2021_us_tract_500k\cb_2021_us_tract_500k.shp"
CEJST_Shapefile = gpd.read_file(CEJST_shapefile_path)

# Filter the GeoDataFrame because it takes 20 minutes to run otherwise
filtered_CEJST_Shapefile = CEJST_Shapefile[CEJST_Shapefile['STATE_NAME'].isin(['Tennessee'])]
filtered_CEJST_Shapefile.head(2)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[20], line 2
      1 CEJST_shapefile_path = r"C:\New_499_Code\assets\cb_2021_us_tract_500k\cb_2021_us_tract_500k.shp"
----> 2 CEJST_Shapefile = gpd.read_file(CEJST_shapefile_path)
      4 # Filter the GeoDataFrame because it takes 20 minutes to run otherwise
      5 filtered_CEJST_Shapefile = CEJST_Shapefile[CEJST_Shapefile['STATE_NAME'].isin(['Tennessee'])]

File c:\School\Spring2024\Spring2024Env\lib\site-packages\geopandas\io\file.py:297, in _read_file(filename, bbox, mask, rows, engine, **kwargs)
    294     else:
    295         path_or_bytes = filename
--> 297     return _read_file_fiona(
    298         path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
    299     )
    301 else:
    302     raise ValueError(f"unknown engine '{engine}'")

File c:\School\Spring2024\Spring2024Env\lib\site-packages\geopandas\io\file.py:395, in _read_file_fiona(path_or_bytes, from_bytes, bbox, mask, rows, where, **kwargs)
    391     df = pd.DataFrame(
    392         [record["properties"] for record in f_filt], columns=columns
    393     )
    394 else:
--> 395     df = GeoDataFrame.from_features(
    396         f_filt, crs=crs, columns=columns + ["geometry"]
    397     )
    398 for k in datetime_fields:
    399     as_dt = None

File c:\School\Spring2024\Spring2024Env\lib\site-packages\geopandas\geodataframe.py:641, in GeoDataFrame.from_features(cls, features, crs, columns)
    638 if hasattr(feature, "__geo_interface__"):
    639     feature = feature.__geo_interface__
    640 row = {
--> 641     "geometry": shape(feature["geometry"]) if feature["geometry"] else None
    642 }
    643 # load properties
    644 properties = feature["properties"]

File c:\School\Spring2024\Spring2024Env\lib\site-packages\shapely\geometry\geo.py:101, in shape(context)
     99     return LinearRing(ob["coordinates"])
    100 elif geom_type == "polygon":
--> 101     return Polygon(ob["coordinates"][0], ob["coordinates"][1:])
    102 elif geom_type == "multipoint":
    103     return MultiPoint(ob["coordinates"])

File c:\School\Spring2024\Spring2024Env\lib\site-packages\shapely\geometry\polygon.py:230, in Polygon.__new__(self, shell, holes)
    228     return shell
    229 else:
--> 230     shell = LinearRing(shell)
    232 if holes is not None:
    233     if len(holes) == 0:
    234         # shapely constructor cannot handle holes=[]

File c:\School\Spring2024\Spring2024Env\lib\site-packages\shapely\geometry\polygon.py:104, in LinearRing.__new__(self, coordinates)
     99 if len(coordinates) == 0:
    100     # empty geometry
    101     # TODO better constructor + should shapely.linearrings handle this?
    102     return shapely.from_wkt("LINEARRING EMPTY")
--> 104 geom = shapely.linearrings(coordinates)
    105 if not isinstance(geom, LinearRing):
    106     raise ValueError("Invalid values passed to LinearRing constructor")

File c:\School\Spring2024\Spring2024Env\lib\site-packages\shapely\decorators.py:77, in multithreading_enabled.<locals>.wrapped(*args, **kwargs)
     75     for arr in array_args:
     76         arr.flags.writeable = False
---> 77     return func(*args, **kwargs)
     78 finally:
     79     for arr, old_flag in zip(array_args, old_flags):

File c:\School\Spring2024\Spring2024Env\lib\site-packages\shapely\creation.py:171, in linearrings(coords, y, z, indices, out, **kwargs)
    169 coords = _xyz_to_coords(coords, y, z)
    170 if indices is None:
--> 171     return lib.linearrings(coords, out=out, **kwargs)
    172 else:
    173     return simple_geometries_1d(coords, indices, GeometryType.LINEARRING, out=out)

KeyboardInterrupt: 
In [ ]:
#merge the environmental data with the shapefile, keep all columns 
health_stats = filtered_CEJST_Shapefile.merge(health_stats, left_on='NAMELSADCO', right_on='County Name', how='left')
health_stats.head(2)
Out[ ]:
STATEFP COUNTYFP TRACTCE AFFGEOID GEOID NAME NAMELSAD STUSPS NAMELSADCO STATE_NAME ... Percent age 10 to 64 Percent age over 64 Total population Current asthma among adults aged greater than or equal to 18 years (percentile) Current asthma among adults aged greater than or equal to 18 years Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile) Diagnosed diabetes among adults aged greater than or equal to 18 years Coronary heart disease among adults aged greater than or equal to 18 years (percentile) Coronary heart disease among adults aged greater than or equal to 18 years Low life expectancy (percentile)
0 47 037 015805 1400000US47037015805 47037015805 158.05 Census Tract 158.05 TN Davidson County Tennessee ... 0.72 0.10 5566.0 40.0 930.0 20.0 810.0 19.0 440.0 20.0
1 47 037 015805 1400000US47037015805 47037015805 158.05 Census Tract 158.05 TN Davidson County Tennessee ... 0.72 0.19 7593.0 24.0 869.0 38.0 950.0 49.0 590.0 18.0

2 rows × 35 columns

In [ ]:
#check to see if health_stats is a geoDataFrame
type(health_stats)
Out[ ]:
geopandas.geodataframe.GeoDataFrame
In [ ]:
health_stats_filtered = health_stats[health_stats['STATE_NAME'] == 'Tennessee']
In [ ]:
 
In [ ]:
print(health_stats.crs)
EPSG:4269
In [ ]:
from sklearn.preprocessing import MinMaxScaler
health_stats = health_stats.applymap(lambda x: 1 if x is True else (0 if x is False else x))

# Normalize the columns using MinMaxScaler (scaling values health_stats 0 and 1)
scaler = MinMaxScaler()

# Selecting the columns to scale
columns_to_scale = [
    'Coronary heart disease among adults aged greater than or equal to 18 years',
    'Current asthma among adults aged greater than or equal to 18 years',
    'Diagnosed diabetes among adults aged greater than or equal to 18 years'
]

# Scaling the columns
health_stats[columns_to_scale] = scaler.fit_transform(health_stats[columns_to_scale])
                                                                                                                                                                      

# Calculating the Health Score by combining the scaled metrics
# Using equal weights for now; this can be adjusted based on importance
health_stats['Health Score'] = (
    health_stats['Coronary heart disease among adults aged greater than or equal to 18 years'] +
    health_stats['Current asthma among adults aged greater than or equal to 18 years'] +
    health_stats['Diagnosed diabetes among adults aged greater than or equal to 18 years'] 
) / 3  # Dividing by 3 


# Displaying the results
print(health_stats[['Census tract 2010 ID', 'County Name', 'Health Score']])
C:\Users\Kassidi\AppData\Local\Temp\ipykernel_32636\4246406488.py:2: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.
  health_stats = health_stats.applymap(lambda x: 1 if x is True else (0 if x is False else x))
        Census tract 2010 ID      County Name  Health Score
0                37057060101  Davidson County      0.220487
1                37057060102  Davidson County      0.245786
2                37057060201  Davidson County      0.318352
3                37057060202  Davidson County      0.365253
4                37057060203  Davidson County      0.280844
...                      ...              ...           ...
314983           48313000200   Madison County      0.318275
314984           48313000300   Madison County      0.308442
314985           48313000400   Madison County      0.326354
314986           51113930100   Madison County      0.337121
314987           51113930200   Madison County      0.324577

[314988 rows x 3 columns]
In [ ]:
from shapely.ops import unary_union

# Create a copy of the GeoDataFrame
health_stats_copy = health_stats.copy()

# Use 'unary_union' to combine multiple geometries for the same county
health_stats_copy = health_stats_copy.groupby(['County Name', 'State/Territory']).agg(
    {
        'Total population': 'mean',
        'Percent American Indian / Alaska Native': 'mean',
        'Percent Asian': 'mean',
        'Percent Black or African American alone': 'mean',
        'Percent Hispanic or Latino': 'mean',
        'Percent Native Hawaiian or Pacific': 'mean',
        'geometry': lambda x: unary_union(x),  # Combine multiple geometries
        'Coronary heart disease among adults aged greater than or equal to 18 years': 'mean',
        'Coronary heart disease among adults aged greater than or equal to 18 years (percentile)': 'mean',
        'Current asthma among adults aged greater than or equal to 18 years': 'mean',
        'Current asthma among adults aged greater than or equal to 18 years (percentile)': 'mean',
        'Diagnosed diabetes among adults aged greater than or equal to 18 years': 'mean',
        'Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)': 'mean',
        'Low life expectancy (percentile)': 'mean',
        'Percent age 10 to 64': 'mean',
        'Percent age over 64': 'mean',
        'Percent age under 10': 'mean',
        'Health Score': 'mean'
    }
).reset_index()

# Convert the dataframe back to a GeoDataFrame
health_stats_copy = gpd.GeoDataFrame(health_stats_copy)

# Check if the GeoDataFrame is correct
type(health_stats_copy)
Out[ ]:
geopandas.geodataframe.GeoDataFrame
In [ ]:
county_health_map = health_stats_copy.set_geometry('geometry') 

# Create the map with the first layer (Health Score)
county_health_map = health_stats_copy.explore(
    column="Health Score",
    scheme="naturalbreaks", 
    legend=False, 
    k=5, 
    tooltip=False, 
    popup=['County Name', 'Health Score'],
    legend_kwds=dict(colorbar=False),  
    name="Health Score",  
    width="80%",  
    height="500px"  
)

# Add the tile layer to the map
folium.TileLayer("CartoDB positron", show=False).add_to(county_health_map)

# Add the second layer (Energy Burden) to the same map
health_stats_copy.explore(
    m=county_health_map,  
    column="Coronary heart disease among adults aged greater than or equal to 18 years",
    scheme="naturalbreaks", 
    legend=False, 
    k=5, 
    tooltip=False, 
    popup=['County Name', 'Coronary heart disease among adults aged greater than or equal to 18 years'],
    legend_kwds=dict(colorbar=False),  
    name="Coronary heart disease among adults aged greater than or equal to 18 years", 
    cmap="Reds" 
)

# Add a Layer Control to toggle layers
folium.LayerControl().add_to(county_health_map)

# Display the map
county_health_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
#create a horizontal bar chart for the top 5 counties with highest diabetes rates
top_5_diabetes = health_stats_copy.nlargest(5, 'Diagnosed diabetes among adults aged greater than or equal to 18 years')
top_5_diabetes = top_5_diabetes.sort_values('Diagnosed diabetes among adults aged greater than or equal to 18 years', ascending=True)

plt.barh(top_5_diabetes['County Name'], top_5_diabetes['Diagnosed diabetes among adults aged greater than or equal to 18 years'], color='skyblue')
plt.xlabel('Diagnosed diabetes among adults aged greater than or equal to 18 years')
plt.ylabel('County Name')
plt.title('Top 5 Counties with Highest Diabetes Rates')
plt.show()
No description has been provided for this image
In [ ]:
#visualize the distribution of health scores
plt.hist(health_stats_copy['Health Score'], bins=20, color='purple')
plt.xlabel('Health Score')
plt.ylabel('Frequency')
plt.title('Distribution of Health Scores')
plt.show()
No description has been provided for this image
In [22]:
%pip install nbconvert
Collecting nbconvert
  Downloading nbconvert-7.16.4-py3-none-any.whl.metadata (8.5 kB)
Requirement already satisfied: beautifulsoup4 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (4.12.3)
Requirement already satisfied: bleach!=5.0.0 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (6.1.0)
Requirement already satisfied: defusedxml in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (0.7.1)
Requirement already satisfied: jinja2>=3.0 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (3.1.3)
Requirement already satisfied: jupyter-core>=4.7 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (5.7.1)
Requirement already satisfied: jupyterlab-pygments in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (0.3.0)
Requirement already satisfied: markupsafe>=2.0 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (2.1.4)
Requirement already satisfied: mistune<4,>=2.0.3 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (3.0.2)
Requirement already satisfied: nbclient>=0.5.0 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (0.9.0)
Requirement already satisfied: nbformat>=5.7 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (5.9.2)
Requirement already satisfied: packaging in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (23.2)
Requirement already satisfied: pandocfilters>=1.4.1 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (1.5.1)
Requirement already satisfied: pygments>=2.4.1 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (2.17.2)
Requirement already satisfied: tinycss2 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (1.2.1)
Requirement already satisfied: traitlets>=5.1 in c:\school\spring2024\spring2024env\lib\site-packages (from nbconvert) (5.14.1)
Requirement already satisfied: six>=1.9.0 in c:\school\spring2024\spring2024env\lib\site-packages (from bleach!=5.0.0->nbconvert) (1.16.0)
Requirement already satisfied: webencodings in c:\school\spring2024\spring2024env\lib\site-packages (from bleach!=5.0.0->nbconvert) (0.5.1)
Requirement already satisfied: platformdirs>=2.5 in c:\school\spring2024\spring2024env\lib\site-packages (from jupyter-core>=4.7->nbconvert) (4.1.0)
Requirement already satisfied: pywin32>=300 in c:\school\spring2024\spring2024env\lib\site-packages (from jupyter-core>=4.7->nbconvert) (306)
Requirement already satisfied: jupyter-client>=6.1.12 in c:\school\spring2024\spring2024env\lib\site-packages (from nbclient>=0.5.0->nbconvert) (8.6.0)
Requirement already satisfied: fastjsonschema in c:\school\spring2024\spring2024env\lib\site-packages (from nbformat>=5.7->nbconvert) (2.19.1)
Requirement already satisfied: jsonschema>=2.6 in c:\school\spring2024\spring2024env\lib\site-packages (from nbformat>=5.7->nbconvert) (4.21.1)
Requirement already satisfied: soupsieve>1.2 in c:\school\spring2024\spring2024env\lib\site-packages (from beautifulsoup4->nbconvert) (2.5)
Requirement already satisfied: attrs>=22.2.0 in c:\school\spring2024\spring2024env\lib\site-packages (from jsonschema>=2.6->nbformat>=5.7->nbconvert) (23.2.0)
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\school\spring2024\spring2024env\lib\site-packages (from jsonschema>=2.6->nbformat>=5.7->nbconvert) (2023.12.1)
Requirement already satisfied: referencing>=0.28.4 in c:\school\spring2024\spring2024env\lib\site-packages (from jsonschema>=2.6->nbformat>=5.7->nbconvert) (0.33.0)
Requirement already satisfied: rpds-py>=0.7.1 in c:\school\spring2024\spring2024env\lib\site-packages (from jsonschema>=2.6->nbformat>=5.7->nbconvert) (0.17.1)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\school\spring2024\spring2024env\lib\site-packages (from jupyter-client>=6.1.12->nbclient>=0.5.0->nbconvert) (2.8.2)
Requirement already satisfied: pyzmq>=23.0 in c:\school\spring2024\spring2024env\lib\site-packages (from jupyter-client>=6.1.12->nbclient>=0.5.0->nbconvert) (25.1.2)
Requirement already satisfied: tornado>=6.2 in c:\school\spring2024\spring2024env\lib\site-packages (from jupyter-client>=6.1.12->nbclient>=0.5.0->nbconvert) (6.4)
Downloading nbconvert-7.16.4-py3-none-any.whl (257 kB)
   ---------------------------------------- 0.0/257.4 kB ? eta -:--:--
   ----------- ---------------------------- 71.7/257.4 kB 1.9 MB/s eta 0:00:01
   ---------------------------------------- 257.4/257.4 kB 3.9 MB/s eta 0:00:00
Installing collected packages: nbconvert
Successfully installed nbconvert-7.16.4
Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip
In [24]:
#save entire notebook to html 

import os
import nbformat
from nbconvert import HTMLExporter

notebook_filename = 'C:\\New_499_Code\\health_analysis2.html'
In [ ]:
#export the notebook to html

html_exporter = HTMLExporter()
content, info = html_exporter.from_filename('C:\\New_499_Code\\health_analysis.ipynb')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[28], line 4
      1 #export the notebook to html
      3 html_exporter = HTMLExporter()
----> 4 content, info = html_exporter.from_filename('C:\\New_499_Code\\health_analysis3.ipynb')

File c:\School\Spring2024\Spring2024Env\lib\site-packages\nbconvert\exporters\templateexporter.py:386, in TemplateExporter.from_filename(self, filename, resources, **kw)
    382 def from_filename(  # type:ignore[override]
    383     self, filename: str, resources: dict[str, t.Any] | None = None, **kw: t.Any
    384 ) -> tuple[str, dict[str, t.Any]]:
    385     """Convert a notebook from a filename."""
--> 386     return super().from_filename(filename, resources, **kw)

File c:\School\Spring2024\Spring2024Env\lib\site-packages\nbconvert\exporters\exporter.py:191, in Exporter.from_filename(self, filename, resources, **kw)
    187 resources["metadata"]["name"] = notebook_name
    188 resources["metadata"]["path"] = path
    190 modified_date = datetime.datetime.fromtimestamp(
--> 191     os.path.getmtime(filename), tz=datetime.timezone.utc
    192 )
    193 # datetime.strftime date format for ipython
    194 if sys.platform == "win32":

File C:\Python310\lib\genericpath.py:55, in getmtime(filename)
     53 def getmtime(filename):
     54     """Return the last modification time of a file, reported by os.stat()."""
---> 55     return os.stat(filename).st_mtime

FileNotFoundError: [WinError 2] The system cannot find the file specified: 'C:\\New_499_Code\\health_analysis3.ipynb'
In [ ]: